clear
set more off

/*

***THIS PART OF THE CODE PROCESSES THE RAW DATA
do preliminaries_2024

keep if pubscore_normal~=.
keep if teamsize<3
gen missing_dates = 0
replace missing_dates = 1 if (requestdate1==. | requestdate2==.)
replace missing_dates = 0 if teamsize~=2
drop if missing_dates==1
drop if rewardquantity<5000
*drop Competitions without teams of size 2 or formation date for those teams:
drop if competitionid == 2445 | competitionid == 2467 | competitionid == 2479 | competitionid == 2863

*identifying treated teams
gen treated = 0
replace treated = 1 if teamsize==2
gen treated_post = 0
replace treated_post = 1 if treated==1 & submissiondate>=merger_time


bys competitionid: egen last_merger_t = max(merger_t)

*weeks since merger
gen merger_Deltat = submissiondate-merger_time if treated==1
replace merger_Deltat = merger_Deltat + 1 if merger_Deltat>=0 & treated==1
gen merger_Deltat_weaks = floor(merger_Deltat/7 - 0.1)+1

*restrict sample to 6 weeks before and after the week of the merger
scalar minweek = -5
scalar maxweek = 7
gen sample_6w = 1
replace sample_6w = 0 if treated==1 & merger_Deltat_weaks<`=minweek'
replace sample_6w = 0 if treated==1 & merger_Deltat_weaks>`=maxweek'
tab merger_Deltat_weaks
replace merger_Deltat_weaks = 0 if treated==0
gen treated_time = treated*merger_Deltat_weaks


save data_temporal, replace
*/
clear
set more off
use data_temporal



***************************
********************TABLES
***************************


**************Table 3
log using Output/Table3.txt, replace text
*Panel A
reghdfe pubscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1
*reghdfe priscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
*estimates store p2
*esttab p1 p2  , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

*Panel B
preserve
count
joinby teamid using match_id_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge

reghdfe pubscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1b
*reghdfe priscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
*estimates store p2b
*esttab p1b p2b  , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

esttab p1 p1b   , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 
*esttab p2 p2b  , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

restore
log close

**************Table A.6
preserve
bys competitionid: egen meantreated=mean(treated)
drop if meantreated==0
drop meantreated

gen mills = .
 levelsof competitionid, local(levels) 
 foreach l of local levels {
capture: probit treated_post userentry_predeadline x2_* if competitionid==`l', r
capture: predict phat, xb
capture: replace mills = exp(-.5*phat^2)/(sqrt(2*_pi)*normprob(phat)) if competitionid==`l'
capture: drop phat
 }

log using Output/TableA6.txt, replace text
reghdfe pubscore_normal treated_post mills x2_*  if teamsize<3 & sample_6w==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p2
gen as = 1 if e(sample)==1
reghdfe pubscore_normal treated_post  x2_*  if teamsize<3 & sample_6w==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p1
reghdfe pubscore_normal treated_post  x2_*  if teamsize<3 & sample_6w==1 & as==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p1b
/*
reghdfe priscore_normal treated_post mills x2_*  if teamsize<3 & sample_6w==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p4
gen bs = 1 if e(sample)==1

reghdfe priscore_normal treated_post  x2_*  if teamsize<3 & sample_6w==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p3
reghdfe priscore_normal treated_post  x2_*  if teamsize<3 & sample_6w==1 & bs==1,  absorb(compday submitteduserid) vce(cluster teamid)
estimates store p3b
*/
esttab p1 p1b p2  , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post mills) title("User-level effects: OLS/Heckman regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 
log close
restore


**************Table A1
log using Output/TableA1.txt, replace text
preserve
clear
use match_list_2024
tab treated
bys mid treated (norm): gen k = _n
keep if k<=1
keep if norm<.25
bys mid: gen as = _N
drop if as<2
tabstat subnum_jt diff_max_pubscore , by(treated)

reg subnum_jt treated, r
reg diff_max_pubscore treated, r

tab treated
restore
log close

**************Table 4

preserve
*number of submissions over time for merging teams
drop aux
gen aux = submissiondate if isafterdeadline=="False"
bys competitionid: egen lastsubdate = max(aux)
bys competitionid: egen firstsubdate = min(aux)
gen submissiondate_delta = submissiondate - firstsubdate
gen submissiondate_week = floor(submissiondate_delta/7 - 0.1)+1
replace submissiondate_week=. if submissiondate<firstsubdate | submissiondate>lastsubdate

keep if submissiondate_week>=1 & submissiondate_week<=12
gen subs = 1
collapse (sum) subs (min) treated_post treated, by(teamid competitionid submissiondate_week)

fillin teamid submissiondate_week
bys teamid (submissiondate_week): replace subs = 0 if subs==. & subs[_n-1]~=.
bys teamid: egen aux = max(competitionid)
replace competitionid = aux 
drop aux

bys teamid (submissiondate_week): replace treated_post = treated_post[_n-1] if treated_post==. & treated_post[_n-1]~=. & _n>1


egen compweek=group(competitionid submissiondate_week)
gen pos_subs = subs>0
gen aux = pos_subs*submissiondate_week
bys teamid (submissiondate_week): egen maxtime = max(aux)
drop aux
gen subs0 = subs
replace subs0 = . if submissiondate_week>maxtime

log using Output/Table4.txt, replace text

*Panel A
reghdfe subs treated_post  , absorb(compweek teamid) vce(cluster teamid) 
estimates store p1
*reghdfe subs0 treated_post  , absorb(compweek teamid) vce(cluster teamid) 
*estimates store p2

*esttab p1 p2  , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Number of submissions effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

*Panel B
count
joinby teamid using match_id_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge

reghdfe subs treated_post  , absorb(compweek teamid) vce(cluster teamid) 
estimates store p1b
*reghdfe subs0 treated_post  , absorb(compweek teamid) vce(cluster teamid) 
*estimates store p2b

esttab p1 p1b   , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_post) title("Number of submissions effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

log close
restore


*********Table A3
preserve
*competition covariates
joinby competitionid using competition_covariates2024, unmatched(master)
tab _merge
drop _merge

gen yearenabled=year(enabled_aux)
gen year_post2015 = yearenabled>2015
*data_image 
bys competitionid: gen k_j = 1 if _n==1
quietly: sum rewardquantity if k_j==1
gen largereward = rewardquantity>`=r(mean)'
quietly: sum total_datasize if k_j==1
gen largedata = total_datasize>`=r(mean)'


log using Output/TableA3.txt, replace text

*poly2: scores
gen treated_post_image = treated_post*data_image 
gen treated_largereward = treated_post*largereward 
gen treated_post2015 = treated_post*year_post2015 
gen treated_largedata = treated_post*largedata 

*poly2: scores
reghdfe pubscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1
reghdfe pubscore_normal treated_post treated_post_image x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1b
reghdfe pubscore_normal treated_post treated_largereward x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1c
reghdfe pubscore_normal treated_post treated_post2015 x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1d
reghdfe pubscore_normal treated_post treated_largedata x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p1e

/*
reghdfe priscore_normal treated_post x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p2
reghdfe priscore_normal treated_post treated_post_image x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p2b
reghdfe priscore_normal treated_post treated_largereward x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p2c
reghdfe priscore_normal treated_post treated_post2015 x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p2d
reghdfe priscore_normal treated_post treated_largedata x2_* if teamsize<3 & sample_6w==1, absorb(compday teamid) vce(cluster teamid)
estimates store p2e
*/
esttab p1 p1b p1c p1d p1e   , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_*) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 
*esttab p2 p2b p2c p2d p2e   , b(%8.3f) se(%8.3f) r2(%8.3f) tex label keep( treated_*) title("Team-level effects: OLS regressions") order(  ) star(* 0.1 ** 0.05 *** 0.01) 

log close
restore



***************************
********************FIGURES
***************************

**************Figure 2


*figures with matching
preserve
scalar minweek = -5
scalar maxweek = 7
drop if treated==1 & merger_Deltat_weaks<`=minweek'
drop if treated==1 & merger_Deltat_weaks>`=maxweek'
replace merger_Deltat_weaks = merger_Deltat_weaks - `=minweek' + 1 if treated==1
replace merger_Deltat_weaks = 0 if merger_Deltat_weaks==(-`=minweek' + 1)
tab merger_Deltat_weaks
replace merger_Deltat_weaks = 0 if treated==0
drop treated_time
gen treated_time = treated*merger_Deltat_weaks

count
joinby teamid using match_id_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge

reghdfe pubscore_normal i.treated_time  x2_*  , absorb(compday teamid) vce(cluster teamid)
coefplot,  keep(*.treated_time) vertical xline(5.9, lp(dash)) yline(0, lp(dash)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle("Score differences ", size(large)) ylabel(,nogrid labsize(large)) xlabel(1 "-6 weeks" 5.9 "Team formation" 13 "7 weeks", nogrid labsize(large))  scheme(sj)

mat mod_beta = e(b)'
mat mod_V= (vecdiag(e(V)))'
svmat mod_beta
svmat mod_V
replace mod_beta1 = . if _n>13
gen mod_time = _n -8
replace mod_time = . if _n>13

replace mod_time = -1 if _n==1
replace mod_time = mod_time+1 if _n>=7
gen conf_low = mod_beta1 - 1.96*sqrt(mod_V1)
gen conf_high = mod_beta1 + 1.96*sqrt(mod_V1)
two (scatter mod_beta1 mod_time ) ///
        (rcap conf_low conf_high mod_time ), legend(off) ylabel(-.2(.1).2) yscale(range(-.21 .21)) xscale(range(-6 6.5)) xline(-0.1, lp(dash) lcolor(gs13)) yline(0, lp(dash) lcolor(gs13)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) xtitle("") ytitle("Score differences", size(large)) ylabel(,nogrid labsize(large)) xlabel(-6 "-6 weeks" 0 "Team formation" 6 "+7 weeks", nogrid labsize(large))  scheme(sj)
graph export Output/Figure2B.pdf, as(pdf)  replace  
drop mod_beta1- conf_high	
restore


*figures without matching
preserve
scalar minweek = -5
scalar maxweek = 7
drop if treated==1 & merger_Deltat_weaks<`=minweek'
drop if treated==1 & merger_Deltat_weaks>`=maxweek'
replace merger_Deltat_weaks = merger_Deltat_weaks - `=minweek' + 1 if treated==1
replace merger_Deltat_weaks = 0 if merger_Deltat_weaks==(-`=minweek' + 1)
tab merger_Deltat_weaks
replace merger_Deltat_weaks = 0 if treated==0
drop treated_time
gen treated_time = treated*merger_Deltat_weaks



reghdfe pubscore_normal i.treated_time  x2_*  , absorb(compday teamid) vce(cluster teamid)
coefplot,  keep(*.treated_time) vertical xline(5.9, lp(dash)) yline(0, lp(dash)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle("Score differences ", size(large)) ylabel(,nogrid labsize(large)) xlabel(1 "-6 weeks" 5.9 "Team formation" 13 "7 weeks", nogrid labsize(large))  scheme(sj)

mat mod_beta = e(b)'
mat mod_V= (vecdiag(e(V)))'
svmat mod_beta
svmat mod_V
replace mod_beta1 = . if _n>13
gen mod_time = _n -8
replace mod_time = . if _n>13

replace mod_time = -1 if _n==1
replace mod_time = mod_time+1 if _n>=7
gen conf_low = mod_beta1 - 1.96*sqrt(mod_V1)
gen conf_high = mod_beta1 + 1.96*sqrt(mod_V1)
two (scatter mod_beta1 mod_time ) ///
        (rcap conf_low conf_high mod_time ), legend(off) ylabel(-.2(.1).2) yscale(range(-.21 .21)) xscale(range(-6 6.5)) xline(-0.1, lp(dash) lcolor(gs13)) yline(0, lp(dash) lcolor(gs13)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) xtitle("") ytitle("Score differences", size(large)) ylabel(,nogrid labsize(large)) xlabel(-6 "-6 weeks" 0 "Team formation" 6 "+7 weeks", nogrid labsize(large))  scheme(sj)
graph export Output/Figure2A.pdf, as(pdf)  replace  
drop mod_beta1- conf_high	
restore

**************Figure 3
preserve
count
joinby teamid using match_id_identifiers_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge
gen subs=1
collapse (sum) subs (max) pubscore_normal (min) privateleaderboardrank (max) treated , by(teamid mid)

sort mid treat
bys mid (treated): gen diff = pubscore_normal[2]-pubscore_normal[1]
bys mid (treated): gen diff_rank = privateleaderboardrank[2]-privateleaderboardrank[1]
bys mid (treated): gen diff_subs = subs[2]-subs[1]
bys mid (treated): gen diff_subs_rel = (subs[2]-subs[1])/subs[2]

bys mid: keep if _n==1

** Figure 3A
twoway__histogram_gen diff_rank if abs(diff_rank)<1000, freq bin(100) gen(h x, replace)
sum diff_rank
scalar asdf = r(mean)
sum h
scalar h_max = r(max)
scalar hh=h_max*1.1
twoway (bar h x, color(gray) lcolor(black) barwidth(20)) (pci 0 `=asdf' `=hh' `=asdf', color(blue)), xtick(`=asdf') text(`=hh-10' `=asdf-90'  "mean") xtitle("Ranking difference", size(large)) ytitle("Frequency", size(large))  ylabel(,nogrid labsize(large)) xlabel(,nogrid labsize(large)) legend(off) graphregion(color(white))
graph export Output/Figure3A.pdf, as(pdf) replace


** Figure 3B
gen diff_subs_rel2=diff_subs_rel*100
twoway__histogram_gen diff_subs_rel2  if abs(diff_subs_rel2)<1000, freq bin(300) gen(h x, replace)
sum diff_subs_rel2
scalar asdf = r(mean)
sum h
scalar h_max = r(max)
scalar hh=h_max*1.1
twoway (bar h x if x>-359 & x<=100, color(gray) lcolor(black) barwidth(4)) (pci 0 `=asdf' `=hh' `=asdf', color(blue)), xtick(`=asdf') text(`=hh-10' `=asdf-25'  "mean") xtitle("Number of Submission (percentage difference)", size(large)) ytitle("Frequency", size(large))  ylabel(,nogrid labsize(large)) xlabel(,nogrid labsize(large)) legend(off) graphregion(color(white))
graph export Output/Figure3B.pdf, as(pdf) replace

restore

*************Figure A5

preserve
gen highest_user = .
bys competitionid submitteduserid (id): replace highest_user = max(highest_user[_n-1], priscore_normal) if _n>1
gen highest_overall = .
bys competitionid  (id): replace highest_overall = max(highest_overall[_n-1], priscore_normal) if _n>1
gen distance = highest_overall - highest_user
bys competitionid submitteduserid (id): gen past_sub_user = _n-1


keep if treated == 1
keep if treated_post == 1
bys submitteduserid (id) : keep if _n==1
keep teamid submitteduserid highest_user distance past_sub_user
bys teamid: gen k = _n
bys teamid: gen J = _N
keep if J==2
drop submitteduserid J
reshape wide highest_user distance past_sub_user, i(teamid) j(k)

two (scatter distance1 distance2 if past_sub_user1>5 & past_sub_user2>5) (lfit distance1 distance2 if past_sub_user1>5 & past_sub_user2>5) ,  graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle("Member 1's distance to leader", size(large)) xtitle("Member 2's distance to leader", size(large)) ylabel(,nogrid labsize(large)) xlabel(,nogrid labsize(large))  scheme(sj) legend(off)
two (scatter distance1 distance2 if distance1<1 & distance2<1 & past_sub_user1>5 & past_sub_user2>5) (lfit distance1 distance2 if distance1<1 & distance2<1 & past_sub_user1>5 & past_sub_user2>5) ,  graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle("Member 1's distance to leader", size(large)) xtitle("Member 2's distance to leader", size(large)) ylabel(,nogrid labsize(large)) xlabel(,nogrid labsize(large)) scheme(sj) legend(off)
graph export Output/FigureA5.pdf, replace

restore

**Figure A1

use data_temporal, clear
count
joinby teamid using match_id_identifiers_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge
gen subs=1
collapse (sum) subs (max) pubscore_normal (min) privateleaderboardrank merger_time enddate (max) treated , by(competitionid teamid mid)
bys mid: egen team_formation_date = min(merger_time)

keep teamid competitionid mid treated team_formation_date
save match_list_2024_JL_responsereferee.dta, replace



do preliminaries_2024

joinby teamid competitionid using match_list_2024_JL_responsereferee, unmatched(using)

keep if submissiondate<team_formation_date
keep teamid competitionid mid treated pubscore_normal priscore_normal subnum_jt daysbetween_jt max_pubscore_j max_pubscore_jt mid diff_max_pubscore

cdfplot pubscore_normal if pubscore_normal>-4, by(treated) legend(order(1 "Solo Player" 2 "Team")) scheme(s2mono) graphregion(color(white)) xtitle("Public Scores")
graph export Output/FigureA1A.pdf, as(pdf) replace

cdfplot priscore_normal if priscore_normal>-4, by(treated) legend(order(1 "Solo Player" 2 "Team")) scheme(s2mono) graphregion(color(white)) xtitle("Private Scores")
graph export Output/FigureA1B.pdf, as(pdf) replace

cdfplot daysbetween_jt if daysbetween_jt<30, by(treated) legend(order(1 "Solo Player" 2 "Team")) scheme(s2mono) graphregion(color(white)) xtitle("Days Between Submissions")
graph export Output/FigureA1C.pdf, as(pdf) replace

erase match_list_2024_JL_responsereferee.dta

**********Table A2 & Figure A2
use data_temporal, clear
count
joinby teamid using match_id_identifiers_2024, unmatched(master)
tab _merge
keep if _merge==3
drop _merge
gen subs=1
collapse (sum) subs (max) pubscore_normal (min) privateleaderboardrank merger_time enddate (max) treated , by(competitionid teamid mid)
bys mid: egen team_formation_date = min(merger_time)
bys mid (treated): gen diff = pubscore_normal[2]-pubscore_normal[1]
bys mid (treated): gen diff_rank = privateleaderboardrank[2]-privateleaderboardrank[1]
bys mid (treated): gen diff_subs = subs[2]-subs[1]
bys mid (treated): gen diff_subs_rel = (subs[2]-subs[1])/subs[2]
bys mid: keep if _n==1
save aux_JL.dta, replace

use competition_covariates2024.dta, clear
gen  deadline_dayaux = date(substr(deadlinedate,1,10), "MDY")
gen  dateenabled_dayaux = date(substr(enableddate,1,10), "MDY")
gen length_days = deadline_dayaux-dateenabled_dayaux
keep competitionid deadline_dayaux dateenabled_dayaux length_days

save aux_comp_dates_JL.dta, replace

use aux_JL, clear
joinby competitionid using aux_comp_dates_JL, unmatched(master)
drop _merge
gen team_formation_t=(team_formation_date-dateenabled_dayaux)/length_days

joinby competitionid using aux_merger_deadlines.dta, unmatched(master)
drop _merge
gen teamformation_relative_deadline=1-(teammergerdeadlinedate-team_formation_date)/(teammergerdeadlinedate-dateenabled_dayaux)



joinby teamid using data_temporal, unmatched(master)
tab _merge
keep if _merge==3
drop _merge

keep teamid competitionid diff diff_rank diff_subs diff_subs_rel team_formation_t teamformation_relative_deadline
bys teamid: keep if _n==1

hist teamformation_relative_deadline if teamformation_relative_deadline<=1, percent scheme(s1mono)  xtitle("Team Formation Time (before deadline)", size(large)) ytitle("Percent", size(large))  ylabel(,nogrid labsize(large)) xlabel(,nogrid labsize(large))
graph export Output/figure_A2.pdf, as(pdf) replace

*cdfplot teamformation_relative_deadline if teamformation_relative_deadline<=1  scheme(s1mono)

log using Output/TableA2.txt, replace text
reghdfe diff_subs_rel team_formation_t if team_formation_t<1, absorb(competitionid) vce(robust)
estimates store R1
reghdfe diff_rank team_formation_t  if team_formation_t<1, absorb(competitionid) vce(robust)
estimates store R2
esttab R1 R2, b(%8.3f) se(%8.3f) r2(%8.3f) tex label title("Impact of Time of Team Formation") star(* 0.1 ** 0.05 *** 0.01) 
log close

erase aux_comp_dates_JL.dta
erase aux_JL.dta
